###########################################
#  Primary Divisions: How Voters Evaluate Policy and Group Differences in Intra-Party Contests
#   - Forthcoming at The Journal of Politics
#   - Henderson et al 2021
#
###########################################
#  - code by S. Goggin & J. Henderson
########################################################
# This file produces figures for candidate choice by levels of political knowledge
########################################################

#dirs="~/Dropbox/replication0/"
#dirs should be set here or in runR.R

rm(list=ls()[which(ls()!='dirs')])

library(ggplot2)
library(stringr)

# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

setwd(dirs)
load("data/vdl.cces_stacked_unmatched.Rdata")


###########################################
###First, need to stack based on candidates, not just candidate pairs (and also get text out for labels later)

#This has leaners as independents, which is incorrect
#cces_stacked$pid3clean <- ifelse(cces_stacked$pid3=="Democrat",-1,ifelse(cces_stacked$pid3=="Republican",1,0))

library(car)


load('data/vdl.data_matrix_scored.Rdata')
candidate_matrix=data_matrix
#read.csv("csv/candidate_matrix_scored.csv",header=T,stringsAsFactors=F)[,-c(1)]

# knowledge/sophistication/ideologue indices
load('data/sophistication_indices.Rdata')

# three level knowledg: 6-7,3-5,0-2
L_ix2=indices$resp_id[indices$"know0to2"]
#M_ix2=indices$resp_id[indices$"M_ix2"]
H_ix2=indices$resp_id[indices$"know67"]

candidate_matrix$sophisticated_level='M'
for(i in 1:length(L_ix2)){
	iq=which(L_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='L'
}
#for(i in 1:length(M_ix2)){
	#iq=which(M_ix2[i]==candidate_matrix$respondent)
	#candidate_matrix$sophisticated_level[iq]='M'
#}
for(i in 1:length(H_ix2)){
	iq=which(H_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='H'
}
candidate_matrix$sophisticated_level[which(candidate_matrix$pid==0)]=NA


###########################################
###Then, produce models w/ Clustered SEs

#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank
 dfc <- (M/(M-1))*((N-1)/(N-K))
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

############################################################

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),]
table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),]

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2])
#H         M         L
#0.4748879 0.3721973 0.1529148

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1])
#H         M         L
#0.2746807 0.3492111 0.3761082

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2])
#H         M         L
#0.4033544 0.3656860 0.2309596

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1])
#H         M         L
#0.2705793 0.3475610 0.3818598

############################################################
############################################################
###Stratify by H,M,L overall

vote <- subset(candidate_matrix,candidate_matrix$sophisticated_level=='H' & (candidate_matrix$conjoints==3|candidate_matrix$conjoints==8))
vote$pty <- ifelse(vote$conjoints==3,0,1)

reps <- subset(vote, vote$pid3==1)
dems <- subset(vote, vote$pid3==-1)
inds <- subset(vote, vote$pid3==1)


#dv_choice is the vote choice variable
# pty is included for independents here, otherwise of course NAs out.

attach(reps)
primary_elec_reps <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_reps_clse <- clse.f(reps,primary_elec_reps,respondent)
detach(reps)

attach(dems)
primary_elec_dems <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_dems_clse <- clse.f(dems,primary_elec_dems,respondent)
detach(dems)

attach(inds)
primary_elec_inds <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_inds_clse <- clse.f(inds,primary_elec_inds,respondent)
detach(inds)

#Now mashing it all together for ggplot

results_matrix <- cbind(primary_elec_dems$coefficients,primary_elec_inds$coefficients,primary_elec_reps$coefficients)
# remove pty coef
results_matrix=results_matrix[-c(2),]
results_matrix=cbind(results_matrix,NA,NA,NA)

for(j in 1:nrow(results_matrix)){
	ix=which(names(primary_elec_dems_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,4]=primary_elec_dems_clse[ix,2]
	}

	ix=which(names(primary_elec_inds_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,5]=primary_elec_inds_clse[ix,2]
	}

	ix=which(names(primary_elec_reps_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,6]=primary_elec_reps_clse[ix,2]
	}
}


results_matrix <- results_matrix[,c(1,4,2,5,3,6)]

colnames(results_matrix) <- c("d_estimate","d_se","i_estimate","i_se","r_estimate","r_se")
results_matrix <- results_matrix[2:58,]
results_matrix_withnames <- cbind(var=rownames(results_matrix),results_matrix)
rownames(results_matrix_withnames) <- seq(1,57,by=1)

#results_matrix_withnames[is.na(results_matrix_withnames)]<- 0
##Now inserting rows for the omitted levels

#omitted: male, white, no religion, attorney, decent, newspaper endorsements, record = helping constituents, raising taxes for both issues

g_male <- c("g_male",rep(0,6))
re_white <- c("re_white",rep(0,6))
r_none <- c("r_none",rep(0,6))
o_attorney <- c("o_attorney",rep(0,6))
p_decent <- c("p_decent",rep(0,6))
e_newspapers <- c("e_newspapers",rep(0,6))
rec_help <- c("rec_help",rep(0,6))
#i_raisetaxes <- c("i1_raisetaxes",rep(0,6))
i_freetrade <- c("i1_freetrade",rep(0,6))

full_matrix <- rbind(
g_male,
results_matrix_withnames[1,],
re_white,
results_matrix_withnames[2:3,],
r_none,
results_matrix_withnames[4:6,],
o_attorney,
results_matrix_withnames[7:15,],
p_decent,
results_matrix_withnames[16:22,],
e_newspapers,
results_matrix_withnames[23:34,],
rec_help,
results_matrix_withnames[35:38,],
i_freetrade,
results_matrix_withnames[39:57,]
)

rownames(full_matrix) <- full_matrix[,1]
full_matrix <- full_matrix[,2:ncol(full_matrix)]
full_matrix_clean <- apply(full_matrix,2,as.numeric)
rownames(full_matrix_clean) <- rownames(full_matrix)

labels <- c(
"Gender - Male",
"Gender - Female",
"Race - White",
"Race - Black",
"Race - Hispanic",
"Religion - None",
"Religion - Catholic",
"Religion - Evangelical Protestant",
"Religion - Protestant",
"Occupation - Attorney",
"Occupation - CEO",
"Occupation - City Council Member",
"Occupation - Factory Foreman",
"Occupation - Farmer",
"Occupation - Former US Army Major",
"Occupation - Political Staffer",
"Occupation - Small Business Owner",
"Occupation - State Legislator",
"Occupation - Teacher",
"Personality - Decent",
"Personality - Compassionate",
"Personality - Empathetic",
"Personality - Inspiring",
"Personality - Intelligent",
"Personality - Knowledgeable",
"Personality - Moral",
"Personality - Strong Leader",
"Endorsements - Major area newspapers",
"Endorsements - Business groups",
"Endorsements - Christian groups",
"Endorsements - Civil rights groups",
"Endorsements - Energy groups",
"Endorsements - Environmental groups",
"Endorsements - Gun control groups",
"Endorsements - Gun rights groups",
"Endorsements - Labor unions",
"Endorsements - Reproductive rights groups",
"Endorsements - Tax reform groups",
"Endorsements - Tea Party groups",
"Endorsements - Veterans groups",
"Record - Help my constituents get the benefits they deserve",
"Record - Refuse to compromise my principles even when it means taking on my party",
"Record - Secure appointment to a powerful legislative committee",
"Record - Stand with my party to do what's right",
"Record - Work across the aisle to get things done",
"Issue - Promote expanding free trade agreements",
"Issue - Raise taxes on those making more than $250,000 a year",
"Issue - Cut taxes on income and capital gains for all",
"Issue - Defend the rights of LGBT individuals",
"Issue - Defend traditional marriage and religious beliefs",
"Issue - Expand domestic oil and gas production through drilling",
"Issue - Expand government and unemployment assistance for those in need",
"Issue - Prevent and prosecute abuse of government assistance programs",
"Issue - Protect a woman's right to choose",
"Issue - Protect gun owners' rights to defend themselves and others",
"Issue - Protect jobs and industry from unfair foreign trade",
"Issue - Protect the lives of the unborn",
"Issue - Provide a path to citizenship for undocumented immigrants",
"Issue - Reduce the size of military and number of military bases",
"Issue - Reform policing and stop racial profiling",
"Issue - Regulate CO2 emissions to combat global warming",
"Issue - Strengthen border security to stop illegal immigration",
"Issue - Strengthen gun control through commonsense restrictions",
"Issue - Strengthen our military and national defense",
"Issue - Toughen sentences and penalties for criminals")

core_for_ggplot <- data.frame(labels=labels,full_matrix_clean)
core_for_ggplot$iv_order <- factor(core_for_ggplot$labels, as.character(core_for_ggplot$labels))

omits=c("Gender - Male",
"Race - White",
"Religion - None",
"Occupation - Attorney",
"Personality - Decent",
"Endorsements - Major area newspapers",
"Record - Help my constituents get the benefits they deserve",
"Issue - Promote expanding free trade agreements")

label.groups=str_sub(labels,1,str_locate(labels,pattern='-')[,1]-2)
o.column=grep(names(core_for_ggplot),pattern='i_est')

#core_for_ggplot=lableOrder(xmat=core_for_ggplot,labels,label.groups,omits,o.column)

#write.csv(core_for_ggplot,"csv/choice_primary_core_for_ggplot.csv")

###########################################
###Then, plot those models

#Doing the within-factor sorting in shitty hack
#core_for_ggplot <- read.csv("csv/choice_primary_core_for_ggplot.csv")
o.order <- read.csv("data/core_for_ggplot_global.csv")$iv_order

core_for_ggplot=reOrder(x=core_for_ggplot,o=o.order)



# augment w/ scores bivariate coefficient

# ideology
#primary_elec_dems_ideo <- lm(data=dems,dv_choice~
#pty + scores)
#primary_elec_dems_ideo_clse <- clse.f(dems,primary_elec_dems_ideo,dems$respondent)

#primary_elec_reps_ideo <- lm(data=reps,dv_choice~
#pty + scores)
#primary_elec_reps_ideo_clse <- clse.f(reps,primary_elec_reps_ideo,reps$respondent)

#primary_elec_inds_ideo <- lm(data=inds,dv_choice~
#pty + scores)
#primary_elec_inds_ideo_clse <- clse.f(inds,primary_elec_inds_ideo,inds$respondent)

#core_for_ggplot[,1]=as.character(core_for_ggplot[,1])
#core_for_ggplot[,2]=as.character(core_for_ggplot[,2])
#core_for_ggplot[,9]=as.character(core_for_ggplot[,9])
#core_for_ggplot=rbind(core_for_ggplot,c('i_ideology','Overall Ideology Rating',
	#primary_elec_dems_ideo_clse[2,1],primary_elec_dems_ideo_clse[2,2],
	#primary_elec_inds_ideo_clse[2,1],primary_elec_inds_ideo_clse[2,2],
	#primary_elec_reps_ideo_clse[2,1],primary_elec_reps_ideo_clse[2,2],
	#'Overall Ideology Rating'))

attach(core_for_ggplot)
labels=core_for_ggplot$labels


ggplot_stacked <- data.frame(
labels=c(as.character(labels), as.character(labels), as.character(labels)),
PID=c(rep("Democrat",length(labels)),rep("Independent",length(labels)),rep("Republican",length(labels))),
estimate=c(d_estimate,i_estimate,r_estimate),
se=c(d_se,i_se,r_se))

#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

ggplot_stacked=ggplot_stacked[!grepl(ggplot_stacked$PID,pattern='Independent'),]
#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

detach(core_for_ggplot)
attach(ggplot_stacked)


ggplot_stacked$estimate=as.numeric(as.character(ggplot_stacked$estimate))
ggplot_stacked$se=as.numeric(as.character(ggplot_stacked$se))

#exclude overall ideology
#ggplot_stacked=ggplot_stacked[-c(grep(as.character(ggplot_stacked[,1]),pattern='Overall')),]
ggplot_stacked$labels=as.factor(as.character(ggplot_stacked$labels))
#write.csv(ggplot_stacked,"csv/choice_primary_ggplot_stacked.csv")
attach(ggplot_stacked)
labels=unique(ggplot_stacked$labels)


pd <- position_dodge(.5)
minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))

ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Democrat',replace=paste(sep='','Democrat (N=',length(unique(dems$respondent)),')'))
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Republican',replace=paste(sep='','Republican (N=',length(unique(reps$respondent)),')'))

pdf("appendix/figures/choice_primary_HighKnow_sophisticated.pdf", width=9, height=14, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=PID)) +
    scale_color_manual(values = c("blue4","firebrick1")) +
    scale_x_discrete(limits=rev(labels)) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-.81,.81) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 	theme(legend.text = element_text(size = 10)) +
#    geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
    geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=25.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=38.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=46.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=56.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=60.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=63.5,size=0.5,linetype="dashed") +
    coord_flip() +
    labs(x = "Experimentally Manipulated Variable", y = "Change Pr(Vote Choice)")
dev.off()

#write.csv(
#	list('n.rep'=nrow(reps),
# 		'n.rep.cluster'=length(unique(reps$respondent)),
# 		'n.rep'=nrow(dems),
# 		'n.rep.cluster'=length(unique(dems$respondent))
#	),
#	file='appendix/figures/choice_primary_H_sophisticated.csv'
#)

ggplot_stackedH=ggplot_stacked

###########################################
#  Ideology and Electability in Primary Voting Behavior
###########################################
#  - code by S. Goggin (Aug 10, 2017)
#  - edited by J. Henderson (Aug 16, 2017)

# validated primary/general voters

rm(list=ls()[which(ls()!='ggplot_stackedH' & ls()!='dirs')])
library(ggplot2)
library(stringr)

# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

setwd(dirs)
load("data/vdl.cces_stacked_unmatched.Rdata")


###########################################
###First, need to stack based on candidates, not just candidate pairs (and also get text out for labels later)

#This has leaners as independents, which is incorrect
#cces_stacked$pid3clean <- ifelse(cces_stacked$pid3=="Democrat",-1,ifelse(cces_stacked$pid3=="Republican",1,0))

library(car)


load('data/vdl.data_matrix_scored.Rdata')
candidate_matrix=data_matrix
#read.csv("csv/candidate_matrix_scored.csv",header=T,stringsAsFactors=F)[,-c(1)]

# knowledge/sophistication/ideologue indices
load('data/sophistication_indices.Rdata')

L_ix2=indices$resp_id[indices$"L_ix2"]
M_ix2=indices$resp_id[indices$"M_ix2"]
H_ix2=indices$resp_id[indices$"H_ix2"]

candidate_matrix$sophisticated_level=NA
for(i in 1:length(L_ix2)){
	iq=which(L_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='L'
}
for(i in 1:length(M_ix2)){
	iq=which(M_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='M'
}
for(i in 1:length(H_ix2)){
	iq=which(H_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='H'
}


###########################################
###Then, produce models w/ Clustered SEs

#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank
 dfc <- (M/(M-1))*((N-1)/(N-K))
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

############################################################

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),]
table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),]

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2])
#H         M         L
#0.4748879 0.3721973 0.1529148

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1])
#H         M         L
#0.2746807 0.3492111 0.3761082

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2])
#H         M         L
#0.4033544 0.3656860 0.2309596

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1])
#H         M         L
#0.2705793 0.3475610 0.3818598

############################################################
############################################################
###Stratify by H,M,L overall

vote <- subset(candidate_matrix,candidate_matrix$sophisticated_level!='H' & (candidate_matrix$conjoints==3|candidate_matrix$conjoints==8))
vote$pty <- ifelse(vote$conjoints==3,0,1)

reps <- subset(vote, vote$pid3==1)
dems <- subset(vote, vote$pid3==-1)
inds <- subset(vote, vote$pid3==1)


#dv_choice is the vote choice variable
# pty is included for independents here, otherwise of course NAs out.

attach(reps)
primary_elec_reps <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_reps_clse <- clse.f(reps,primary_elec_reps,respondent)
detach(reps)

attach(dems)
primary_elec_dems <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_dems_clse <- clse.f(dems,primary_elec_dems,respondent)
detach(dems)

attach(inds)
primary_elec_inds <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_inds_clse <- clse.f(inds,primary_elec_inds,respondent)
detach(inds)

#Now mashing it all together for ggplot

results_matrix <- cbind(primary_elec_dems$coefficients,primary_elec_inds$coefficients,primary_elec_reps$coefficients)
# remove pty coef
results_matrix=results_matrix[-c(2),]
results_matrix=cbind(results_matrix,NA,NA,NA)

for(j in 1:nrow(results_matrix)){
	ix=which(names(primary_elec_dems_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,4]=primary_elec_dems_clse[ix,2]
	}

	ix=which(names(primary_elec_inds_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,5]=primary_elec_inds_clse[ix,2]
	}

	ix=which(names(primary_elec_reps_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,6]=primary_elec_reps_clse[ix,2]
	}
}


results_matrix <- results_matrix[,c(1,4,2,5,3,6)]

colnames(results_matrix) <- c("d_estimate","d_se","i_estimate","i_se","r_estimate","r_se")
results_matrix <- results_matrix[2:58,]
results_matrix_withnames <- cbind(var=rownames(results_matrix),results_matrix)
rownames(results_matrix_withnames) <- seq(1,57,by=1)

#results_matrix_withnames[is.na(results_matrix_withnames)]<- 0
##Now inserting rows for the omitted levels

#omitted: male, white, no religion, attorney, decent, newspaper endorsements, record = helping constituents, raising taxes for both issues

g_male <- c("g_male",rep(0,6))
re_white <- c("re_white",rep(0,6))
r_none <- c("r_none",rep(0,6))
o_attorney <- c("o_attorney",rep(0,6))
p_decent <- c("p_decent",rep(0,6))
e_newspapers <- c("e_newspapers",rep(0,6))
rec_help <- c("rec_help",rep(0,6))
#i_raisetaxes <- c("i1_raisetaxes",rep(0,6))
i_freetrade <- c("i1_freetrade",rep(0,6))

full_matrix <- rbind(
g_male,
results_matrix_withnames[1,],
re_white,
results_matrix_withnames[2:3,],
r_none,
results_matrix_withnames[4:6,],
o_attorney,
results_matrix_withnames[7:15,],
p_decent,
results_matrix_withnames[16:22,],
e_newspapers,
results_matrix_withnames[23:34,],
rec_help,
results_matrix_withnames[35:38,],
i_freetrade,
results_matrix_withnames[39:57,]
)

rownames(full_matrix) <- full_matrix[,1]
full_matrix <- full_matrix[,2:ncol(full_matrix)]
full_matrix_clean <- apply(full_matrix,2,as.numeric)
rownames(full_matrix_clean) <- rownames(full_matrix)

labels <- c(
"Gender - Male",
"Gender - Female",
"Race - White",
"Race - Black",
"Race - Hispanic",
"Religion - None",
"Religion - Catholic",
"Religion - Evangelical Protestant",
"Religion - Protestant",
"Occupation - Attorney",
"Occupation - CEO",
"Occupation - City Council Member",
"Occupation - Factory Foreman",
"Occupation - Farmer",
"Occupation - Former US Army Major",
"Occupation - Political Staffer",
"Occupation - Small Business Owner",
"Occupation - State Legislator",
"Occupation - Teacher",
"Personality - Decent",
"Personality - Compassionate",
"Personality - Empathetic",
"Personality - Inspiring",
"Personality - Intelligent",
"Personality - Knowledgeable",
"Personality - Moral",
"Personality - Strong Leader",
"Endorsements - Major area newspapers",
"Endorsements - Business groups",
"Endorsements - Christian groups",
"Endorsements - Civil rights groups",
"Endorsements - Energy groups",
"Endorsements - Environmental groups",
"Endorsements - Gun control groups",
"Endorsements - Gun rights groups",
"Endorsements - Labor unions",
"Endorsements - Reproductive rights groups",
"Endorsements - Tax reform groups",
"Endorsements - Tea Party groups",
"Endorsements - Veterans groups",
"Record - Help my constituents get the benefits they deserve",
"Record - Refuse to compromise my principles even when it means taking on my party",
"Record - Secure appointment to a powerful legislative committee",
"Record - Stand with my party to do what's right",
"Record - Work across the aisle to get things done",
"Issue - Promote expanding free trade agreements",
"Issue - Raise taxes on those making more than $250,000 a year",
"Issue - Cut taxes on income and capital gains for all",
"Issue - Defend the rights of LGBT individuals",
"Issue - Defend traditional marriage and religious beliefs",
"Issue - Expand domestic oil and gas production through drilling",
"Issue - Expand government and unemployment assistance for those in need",
"Issue - Prevent and prosecute abuse of government assistance programs",
"Issue - Protect a woman's right to choose",
"Issue - Protect gun owners' rights to defend themselves and others",
"Issue - Protect jobs and industry from unfair foreign trade",
"Issue - Protect the lives of the unborn",
"Issue - Provide a path to citizenship for undocumented immigrants",
"Issue - Reduce the size of military and number of military bases",
"Issue - Reform policing and stop racial profiling",
"Issue - Regulate CO2 emissions to combat global warming",
"Issue - Strengthen border security to stop illegal immigration",
"Issue - Strengthen gun control through commonsense restrictions",
"Issue - Strengthen our military and national defense",
"Issue - Toughen sentences and penalties for criminals")

core_for_ggplot <- data.frame(labels=labels,full_matrix_clean)
core_for_ggplot$iv_order <- factor(core_for_ggplot$labels, as.character(core_for_ggplot$labels))

omits=c("Gender - Male",
"Race - White",
"Religion - None",
"Occupation - Attorney",
"Personality - Decent",
"Endorsements - Major area newspapers",
"Record - Help my constituents get the benefits they deserve",
"Issue - Promote expanding free trade agreements")

label.groups=str_sub(labels,1,str_locate(labels,pattern='-')[,1]-2)
o.column=grep(names(core_for_ggplot),pattern='i_est')

#core_for_ggplot=lableOrder(xmat=core_for_ggplot,labels,label.groups,omits,o.column)

#write.csv(core_for_ggplot,"csv/choice_primary_core_for_ggplot.csv")

###########################################
###Then, plot those models

#Doing the within-factor sorting in shitty hack
#core_for_ggplot <- read.csv("csv/choice_primary_core_for_ggplot.csv")
o.order <- read.csv("data/core_for_ggplot_global.csv")$iv_order

core_for_ggplot=reOrder(x=core_for_ggplot,o=o.order)



# augment w/ scores bivariate coefficient

# ideology
#primary_elec_dems_ideo <- lm(data=dems,dv_choice~
#pty + scores)
#primary_elec_dems_ideo_clse <- clse.f(dems,primary_elec_dems_ideo,dems$respondent)

#primary_elec_reps_ideo <- lm(data=reps,dv_choice~
#pty + scores)
#primary_elec_reps_ideo_clse <- clse.f(reps,primary_elec_reps_ideo,reps$respondent)

#primary_elec_inds_ideo <- lm(data=inds,dv_choice~
#pty + scores)
#primary_elec_inds_ideo_clse <- clse.f(inds,primary_elec_inds_ideo,inds$respondent)

#core_for_ggplot[,1]=as.character(core_for_ggplot[,1])
#core_for_ggplot[,2]=as.character(core_for_ggplot[,2])
#core_for_ggplot[,9]=as.character(core_for_ggplot[,9])
#core_for_ggplot=rbind(core_for_ggplot,c('i_ideology','Overall Ideology Rating',
	#primary_elec_dems_ideo_clse[2,1],primary_elec_dems_ideo_clse[2,2],
	#primary_elec_inds_ideo_clse[2,1],primary_elec_inds_ideo_clse[2,2],
	#primary_elec_reps_ideo_clse[2,1],primary_elec_reps_ideo_clse[2,2],
	#'Overall Ideology Rating'))

attach(core_for_ggplot)
labels=core_for_ggplot$labels


ggplot_stacked <- data.frame(
labels=c(as.character(labels), as.character(labels), as.character(labels)),
PID=c(rep("Democrat",length(labels)),rep("Independent",length(labels)),rep("Republican",length(labels))),
estimate=c(d_estimate,i_estimate,r_estimate),
se=c(d_se,i_se,r_se))

#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

ggplot_stacked=ggplot_stacked[!grepl(ggplot_stacked$PID,pattern='Independent'),]
#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

detach(core_for_ggplot)
attach(ggplot_stacked)


ggplot_stacked$estimate=as.numeric(as.character(ggplot_stacked$estimate))
ggplot_stacked$se=as.numeric(as.character(ggplot_stacked$se))

#exclude overall ideology
#ggplot_stacked=ggplot_stacked[-c(grep(as.character(ggplot_stacked[,1]),pattern='Overall')),]
ggplot_stacked$labels=as.factor(as.character(ggplot_stacked$labels))
#write.csv(ggplot_stacked,"csv/choice_primary_ggplot_stacked.csv")
attach(ggplot_stacked)
labels=unique(ggplot_stacked$labels)


pd <- position_dodge(.5)
minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))

ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Democrat',replace=paste(sep='','Democrat (N=',length(unique(dems$respondent)),')'))
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Republican',replace=paste(sep='','Republican (N=',length(unique(reps$respondent)),')'))

pdf("appendix/figures/choice_primary_notHighKnow_sophisticated.pdf", width=9, height=14, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=PID)) +
    scale_color_manual(values = c("blue4","firebrick1")) +
    scale_x_discrete(limits=rev(labels)) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-.81,.81) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 	theme(legend.text = element_text(size = 10)) +
#    geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
    geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=25.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=38.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=46.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=56.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=60.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=63.5,size=0.5,linetype="dashed") +
    coord_flip() +
    labs(x = "Experimentally Manipulated Variable", y = "Change Pr(Vote Choice)")
dev.off()

#write.csv(
#	list('n.rep'=nrow(reps),
# 		'n.rep.cluster'=length(unique(reps$respondent)),
# 		'n.rep'=nrow(dems),
# 		'n.rep.cluster'=length(unique(dems$respondent))
#	),
#	file='appendix/figures/choice_primary_notH_sophisticated.csv'
#)



###########################################
#  Ideology and Electability in Primary Voting Behavior
###########################################
#  - code by S. Goggin (Aug 10, 2017)
#  - edited by J. Henderson (Aug 16, 2017)

# validated primary/general voters

rm(list=ls()[which(ls()!='ggplot_stackedH' & ls()!='dirs')])
library(ggplot2)
library(stringr)

# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

setwd(dirs)
load("data/vdl.cces_stacked_unmatched.Rdata")


###########################################
###First, need to stack based on candidates, not just candidate pairs (and also get text out for labels later)

#This has leaners as independents, which is incorrect
#cces_stacked$pid3clean <- ifelse(cces_stacked$pid3=="Democrat",-1,ifelse(cces_stacked$pid3=="Republican",1,0))

library(car)


load('data/vdl.data_matrix_scored.Rdata')
candidate_matrix=data_matrix
#read.csv("csv/candidate_matrix_scored.csv",header=T,stringsAsFactors=F)[,-c(1)]

# knowledge/sophistication/ideologue indices
load('data/sophistication_indices.Rdata')

L_ix2=indices$resp_id[indices$"L_ix2"]
M_ix2=indices$resp_id[indices$"M_ix2"]
H_ix2=indices$resp_id[indices$"H_ix2"]

candidate_matrix$sophisticated_level=NA
for(i in 1:length(L_ix2)){
	iq=which(L_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='L'
}
for(i in 1:length(M_ix2)){
	iq=which(M_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='M'
}
for(i in 1:length(H_ix2)){
	iq=which(H_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='H'
}


###########################################
###Then, produce models w/ Clustered SEs

#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank
 dfc <- (M/(M-1))*((N-1)/(N-K))
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

############################################################

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),]
table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),]

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2])
#H         M         L
#0.4748879 0.3721973 0.1529148

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1])
#H         M         L
#0.2746807 0.3492111 0.3761082

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2])
#H         M         L
#0.4033544 0.3656860 0.2309596

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1])
#H         M         L
#0.2705793 0.3475610 0.3818598

############################################################
############################################################
###Stratify by H,M,L overall

vote <- subset(candidate_matrix,candidate_matrix$sophisticated_level=='L' & (candidate_matrix$conjoints==3|candidate_matrix$conjoints==8))
vote$pty <- ifelse(vote$conjoints==3,0,1)

reps <- subset(vote, vote$pid3==1)
dems <- subset(vote, vote$pid3==-1)
inds <- subset(vote, vote$pid3==1)


#dv_choice is the vote choice variable
# pty is included for independents here, otherwise of course NAs out.

attach(reps)
primary_elec_reps <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_reps_clse <- clse.f(reps,primary_elec_reps,respondent)
detach(reps)

attach(dems)
primary_elec_dems <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_dems_clse <- clse.f(dems,primary_elec_dems,respondent)
detach(dems)

attach(inds)
primary_elec_inds <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_inds_clse <- clse.f(inds,primary_elec_inds,respondent)
detach(inds)

#Now mashing it all together for ggplot

results_matrix <- cbind(primary_elec_dems$coefficients,primary_elec_inds$coefficients,primary_elec_reps$coefficients)
# remove pty coef
results_matrix=results_matrix[-c(2),]
results_matrix=cbind(results_matrix,NA,NA,NA)

for(j in 1:nrow(results_matrix)){
	ix=which(names(primary_elec_dems_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,4]=primary_elec_dems_clse[ix,2]
	}

	ix=which(names(primary_elec_inds_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,5]=primary_elec_inds_clse[ix,2]
	}

	ix=which(names(primary_elec_reps_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,6]=primary_elec_reps_clse[ix,2]
	}
}


results_matrix <- results_matrix[,c(1,4,2,5,3,6)]

colnames(results_matrix) <- c("d_estimate","d_se","i_estimate","i_se","r_estimate","r_se")
results_matrix <- results_matrix[2:58,]
results_matrix_withnames <- cbind(var=rownames(results_matrix),results_matrix)
rownames(results_matrix_withnames) <- seq(1,57,by=1)

#results_matrix_withnames[is.na(results_matrix_withnames)]<- 0
##Now inserting rows for the omitted levels

#omitted: male, white, no religion, attorney, decent, newspaper endorsements, record = helping constituents, raising taxes for both issues

g_male <- c("g_male",rep(0,6))
re_white <- c("re_white",rep(0,6))
r_none <- c("r_none",rep(0,6))
o_attorney <- c("o_attorney",rep(0,6))
p_decent <- c("p_decent",rep(0,6))
e_newspapers <- c("e_newspapers",rep(0,6))
rec_help <- c("rec_help",rep(0,6))
#i_raisetaxes <- c("i1_raisetaxes",rep(0,6))
i_freetrade <- c("i1_freetrade",rep(0,6))

full_matrix <- rbind(
g_male,
results_matrix_withnames[1,],
re_white,
results_matrix_withnames[2:3,],
r_none,
results_matrix_withnames[4:6,],
o_attorney,
results_matrix_withnames[7:15,],
p_decent,
results_matrix_withnames[16:22,],
e_newspapers,
results_matrix_withnames[23:34,],
rec_help,
results_matrix_withnames[35:38,],
i_freetrade,
results_matrix_withnames[39:57,]
)

rownames(full_matrix) <- full_matrix[,1]
full_matrix <- full_matrix[,2:ncol(full_matrix)]
full_matrix_clean <- apply(full_matrix,2,as.numeric)
rownames(full_matrix_clean) <- rownames(full_matrix)

labels <- c(
"Gender - Male",
"Gender - Female",
"Race - White",
"Race - Black",
"Race - Hispanic",
"Religion - None",
"Religion - Catholic",
"Religion - Evangelical Protestant",
"Religion - Protestant",
"Occupation - Attorney",
"Occupation - CEO",
"Occupation - City Council Member",
"Occupation - Factory Foreman",
"Occupation - Farmer",
"Occupation - Former US Army Major",
"Occupation - Political Staffer",
"Occupation - Small Business Owner",
"Occupation - State Legislator",
"Occupation - Teacher",
"Personality - Decent",
"Personality - Compassionate",
"Personality - Empathetic",
"Personality - Inspiring",
"Personality - Intelligent",
"Personality - Knowledgeable",
"Personality - Moral",
"Personality - Strong Leader",
"Endorsements - Major area newspapers",
"Endorsements - Business groups",
"Endorsements - Christian groups",
"Endorsements - Civil rights groups",
"Endorsements - Energy groups",
"Endorsements - Environmental groups",
"Endorsements - Gun control groups",
"Endorsements - Gun rights groups",
"Endorsements - Labor unions",
"Endorsements - Reproductive rights groups",
"Endorsements - Tax reform groups",
"Endorsements - Tea Party groups",
"Endorsements - Veterans groups",
"Record - Help my constituents get the benefits they deserve",
"Record - Refuse to compromise my principles even when it means taking on my party",
"Record - Secure appointment to a powerful legislative committee",
"Record - Stand with my party to do what's right",
"Record - Work across the aisle to get things done",
"Issue - Promote expanding free trade agreements",
"Issue - Raise taxes on those making more than $250,000 a year",
"Issue - Cut taxes on income and capital gains for all",
"Issue - Defend the rights of LGBT individuals",
"Issue - Defend traditional marriage and religious beliefs",
"Issue - Expand domestic oil and gas production through drilling",
"Issue - Expand government and unemployment assistance for those in need",
"Issue - Prevent and prosecute abuse of government assistance programs",
"Issue - Protect a woman's right to choose",
"Issue - Protect gun owners' rights to defend themselves and others",
"Issue - Protect jobs and industry from unfair foreign trade",
"Issue - Protect the lives of the unborn",
"Issue - Provide a path to citizenship for undocumented immigrants",
"Issue - Reduce the size of military and number of military bases",
"Issue - Reform policing and stop racial profiling",
"Issue - Regulate CO2 emissions to combat global warming",
"Issue - Strengthen border security to stop illegal immigration",
"Issue - Strengthen gun control through commonsense restrictions",
"Issue - Strengthen our military and national defense",
"Issue - Toughen sentences and penalties for criminals")

core_for_ggplot <- data.frame(labels=labels,full_matrix_clean)
core_for_ggplot$iv_order <- factor(core_for_ggplot$labels, as.character(core_for_ggplot$labels))

omits=c("Gender - Male",
"Race - White",
"Religion - None",
"Occupation - Attorney",
"Personality - Decent",
"Endorsements - Major area newspapers",
"Record - Help my constituents get the benefits they deserve",
"Issue - Promote expanding free trade agreements")

label.groups=str_sub(labels,1,str_locate(labels,pattern='-')[,1]-2)
o.column=grep(names(core_for_ggplot),pattern='i_est')

#core_for_ggplot=lableOrder(xmat=core_for_ggplot,labels,label.groups,omits,o.column)

#write.csv(core_for_ggplot,"csv/choice_primary_core_for_ggplot.csv")

###########################################
###Then, plot those models

#Doing the within-factor sorting in shitty hack
#core_for_ggplot <- read.csv("csv/choice_primary_core_for_ggplot.csv")
o.order <- read.csv("data/core_for_ggplot_global.csv")$iv_order

core_for_ggplot=reOrder(x=core_for_ggplot,o=o.order)



# augment w/ scores bivariate coefficient

# ideology
#primary_elec_dems_ideo <- lm(data=dems,dv_choice~
#pty + scores)
#primary_elec_dems_ideo_clse <- clse.f(dems,primary_elec_dems_ideo,dems$respondent)

#primary_elec_reps_ideo <- lm(data=reps,dv_choice~
#pty + scores)
#primary_elec_reps_ideo_clse <- clse.f(reps,primary_elec_reps_ideo,reps$respondent)

#primary_elec_inds_ideo <- lm(data=inds,dv_choice~
#pty + scores)
#primary_elec_inds_ideo_clse <- clse.f(inds,primary_elec_inds_ideo,inds$respondent)

#core_for_ggplot[,1]=as.character(core_for_ggplot[,1])
#core_for_ggplot[,2]=as.character(core_for_ggplot[,2])
#core_for_ggplot[,9]=as.character(core_for_ggplot[,9])
#core_for_ggplot=rbind(core_for_ggplot,c('i_ideology','Overall Ideology Rating',
	#primary_elec_dems_ideo_clse[2,1],primary_elec_dems_ideo_clse[2,2],
	#primary_elec_inds_ideo_clse[2,1],primary_elec_inds_ideo_clse[2,2],
	#primary_elec_reps_ideo_clse[2,1],primary_elec_reps_ideo_clse[2,2],
	#'Overall Ideology Rating'))

attach(core_for_ggplot)
labels=core_for_ggplot$labels


ggplot_stacked <- data.frame(
labels=c(as.character(labels), as.character(labels), as.character(labels)),
PID=c(rep("Democrat",length(labels)),rep("Independent",length(labels)),rep("Republican",length(labels))),
estimate=c(d_estimate,i_estimate,r_estimate),
se=c(d_se,i_se,r_se))

#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

ggplot_stacked=ggplot_stacked[!grepl(ggplot_stacked$PID,pattern='Independent'),]
#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

detach(core_for_ggplot)
attach(ggplot_stacked)


ggplot_stacked$estimate=as.numeric(as.character(ggplot_stacked$estimate))
ggplot_stacked$se=as.numeric(as.character(ggplot_stacked$se))

#exclude overall ideology
#ggplot_stacked=ggplot_stacked[-c(grep(as.character(ggplot_stacked[,1]),pattern='Overall')),]
ggplot_stacked$labels=as.factor(as.character(ggplot_stacked$labels))
#write.csv(ggplot_stacked,"csv/choice_primary_ggplot_stacked.csv")
attach(ggplot_stacked)
labels=unique(ggplot_stacked$labels)


pd <- position_dodge(.5)
minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))

ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Democrat',replace=paste(sep='','Democrat (N=',length(unique(dems$respondent)),')'))
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Republican',replace=paste(sep='','Republican (N=',length(unique(reps$respondent)),')'))

pdf("appendix/figures/choice_primary_LowKnow_sophisticated.pdf", width=9, height=14, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=PID)) +
    scale_color_manual(values = c("blue4","firebrick1")) +
    scale_x_discrete(limits=rev(labels)) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-.81,.81) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 	theme(legend.text = element_text(size = 10)) +
#    geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
    geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=25.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=38.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=46.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=56.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=60.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=63.5,size=0.5,linetype="dashed") +
    coord_flip() +
    labs(x = "Experimentally Manipulated Variable", y = "Change Pr(Vote Choice)")
dev.off()

#write.csv(
#	list('n.rep'=nrow(reps),
 #		'n.rep.cluster'=length(unique(reps$respondent)),
 	#	'n.rep'=nrow(dems),
 #		'n.rep.cluster'=length(unique(dems$respondent))
#	),
#	file='appendix/figures/choice_primary_L_sophisticated.csv'
#)


ggplot_stackedL=ggplot_stacked

###########################################
#  Ideology and Electability in Primary Voting Behavior
###########################################
#  - code by S. Goggin (Aug 10, 2017)
#  - edited by J. Henderson (Aug 16, 2017)

# validated primary/general voters

rm(list=ls()[which(ls()!='ggplot_stackedH' & ls()!='ggplot_stackedL' & ls()!='dirs')])

library(ggplot2)
library(stringr)

# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

setwd(dirs)
load("data/vdl.cces_stacked_unmatched.Rdata")


###########################################
###First, need to stack based on candidates, not just candidate pairs (and also get text out for labels later)

#This has leaners as independents, which is incorrect
#cces_stacked$pid3clean <- ifelse(cces_stacked$pid3=="Democrat",-1,ifelse(cces_stacked$pid3=="Republican",1,0))

library(car)


load('data/vdl.data_matrix_scored.Rdata')
candidate_matrix=data_matrix
#read.csv("csv/candidate_matrix_scored.csv",header=T,stringsAsFactors=F)[,-c(1)]

# knowledge/sophistication/ideologue indices
load('data/sophistication_indices.Rdata')

L_ix2=indices$resp_id[indices$"L_ix2"]
M_ix2=indices$resp_id[indices$"M_ix2"]
H_ix2=indices$resp_id[indices$"H_ix2"]

candidate_matrix$sophisticated_level=NA
for(i in 1:length(L_ix2)){
	iq=which(L_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='L'
}
for(i in 1:length(M_ix2)){
	iq=which(M_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='M'
}
for(i in 1:length(H_ix2)){
	iq=which(H_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='H'
}


###########################################
###Then, produce models w/ Clustered SEs

#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank
 dfc <- (M/(M-1))*((N-1)/(N-K))
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

############################################################

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),]
table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),]

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2])
#H         M         L
#0.4748879 0.3721973 0.1529148

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1])
#H         M         L
#0.2746807 0.3492111 0.3761082

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2])
#H         M         L
#0.4033544 0.3656860 0.2309596

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1])
#H         M         L
#0.2705793 0.3475610 0.3818598

############################################################
############################################################
###Stratify by H,M,L overall

vote <- subset(candidate_matrix,candidate_matrix$sophisticated_level=='M' & (candidate_matrix$conjoints==3|candidate_matrix$conjoints==8))
vote$pty <- ifelse(vote$conjoints==3,0,1)

reps <- subset(vote, vote$pid3==1)
dems <- subset(vote, vote$pid3==-1)
inds <- subset(vote, vote$pid3==1)


#dv_choice is the vote choice variable
# pty is included for independents here, otherwise of course NAs out.

attach(reps)
primary_elec_reps <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_reps_clse <- clse.f(reps,primary_elec_reps,respondent)
detach(reps)

attach(dems)
primary_elec_dems <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_dems_clse <- clse.f(dems,primary_elec_dems,respondent)
detach(dems)

attach(inds)
primary_elec_inds <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_inds_clse <- clse.f(inds,primary_elec_inds,respondent)
detach(inds)

#Now mashing it all together for ggplot

results_matrix <- cbind(primary_elec_dems$coefficients,primary_elec_inds$coefficients,primary_elec_reps$coefficients)
# remove pty coef
results_matrix=results_matrix[-c(2),]
results_matrix=cbind(results_matrix,NA,NA,NA)

for(j in 1:nrow(results_matrix)){
	ix=which(names(primary_elec_dems_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,4]=primary_elec_dems_clse[ix,2]
	}

	ix=which(names(primary_elec_inds_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,5]=primary_elec_inds_clse[ix,2]
	}

	ix=which(names(primary_elec_reps_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,6]=primary_elec_reps_clse[ix,2]
	}
}


results_matrix <- results_matrix[,c(1,4,2,5,3,6)]

colnames(results_matrix) <- c("d_estimate","d_se","i_estimate","i_se","r_estimate","r_se")
results_matrix <- results_matrix[2:58,]
results_matrix_withnames <- cbind(var=rownames(results_matrix),results_matrix)
rownames(results_matrix_withnames) <- seq(1,57,by=1)

#results_matrix_withnames[is.na(results_matrix_withnames)]<- 0
##Now inserting rows for the omitted levels

#omitted: male, white, no religion, attorney, decent, newspaper endorsements, record = helping constituents, raising taxes for both issues

g_male <- c("g_male",rep(0,6))
re_white <- c("re_white",rep(0,6))
r_none <- c("r_none",rep(0,6))
o_attorney <- c("o_attorney",rep(0,6))
p_decent <- c("p_decent",rep(0,6))
e_newspapers <- c("e_newspapers",rep(0,6))
rec_help <- c("rec_help",rep(0,6))
#i_raisetaxes <- c("i1_raisetaxes",rep(0,6))
i_freetrade <- c("i1_freetrade",rep(0,6))

full_matrix <- rbind(
g_male,
results_matrix_withnames[1,],
re_white,
results_matrix_withnames[2:3,],
r_none,
results_matrix_withnames[4:6,],
o_attorney,
results_matrix_withnames[7:15,],
p_decent,
results_matrix_withnames[16:22,],
e_newspapers,
results_matrix_withnames[23:34,],
rec_help,
results_matrix_withnames[35:38,],
i_freetrade,
results_matrix_withnames[39:57,]
)

rownames(full_matrix) <- full_matrix[,1]
full_matrix <- full_matrix[,2:ncol(full_matrix)]
full_matrix_clean <- apply(full_matrix,2,as.numeric)
rownames(full_matrix_clean) <- rownames(full_matrix)

labels <- c(
"Gender - Male",
"Gender - Female",
"Race - White",
"Race - Black",
"Race - Hispanic",
"Religion - None",
"Religion - Catholic",
"Religion - Evangelical Protestant",
"Religion - Protestant",
"Occupation - Attorney",
"Occupation - CEO",
"Occupation - City Council Member",
"Occupation - Factory Foreman",
"Occupation - Farmer",
"Occupation - Former US Army Major",
"Occupation - Political Staffer",
"Occupation - Small Business Owner",
"Occupation - State Legislator",
"Occupation - Teacher",
"Personality - Decent",
"Personality - Compassionate",
"Personality - Empathetic",
"Personality - Inspiring",
"Personality - Intelligent",
"Personality - Knowledgeable",
"Personality - Moral",
"Personality - Strong Leader",
"Endorsements - Major area newspapers",
"Endorsements - Business groups",
"Endorsements - Christian groups",
"Endorsements - Civil rights groups",
"Endorsements - Energy groups",
"Endorsements - Environmental groups",
"Endorsements - Gun control groups",
"Endorsements - Gun rights groups",
"Endorsements - Labor unions",
"Endorsements - Reproductive rights groups",
"Endorsements - Tax reform groups",
"Endorsements - Tea Party groups",
"Endorsements - Veterans groups",
"Record - Help my constituents get the benefits they deserve",
"Record - Refuse to compromise my principles even when it means taking on my party",
"Record - Secure appointment to a powerful legislative committee",
"Record - Stand with my party to do what's right",
"Record - Work across the aisle to get things done",
"Issue - Promote expanding free trade agreements",
"Issue - Raise taxes on those making more than $250,000 a year",
"Issue - Cut taxes on income and capital gains for all",
"Issue - Defend the rights of LGBT individuals",
"Issue - Defend traditional marriage and religious beliefs",
"Issue - Expand domestic oil and gas production through drilling",
"Issue - Expand government and unemployment assistance for those in need",
"Issue - Prevent and prosecute abuse of government assistance programs",
"Issue - Protect a woman's right to choose",
"Issue - Protect gun owners' rights to defend themselves and others",
"Issue - Protect jobs and industry from unfair foreign trade",
"Issue - Protect the lives of the unborn",
"Issue - Provide a path to citizenship for undocumented immigrants",
"Issue - Reduce the size of military and number of military bases",
"Issue - Reform policing and stop racial profiling",
"Issue - Regulate CO2 emissions to combat global warming",
"Issue - Strengthen border security to stop illegal immigration",
"Issue - Strengthen gun control through commonsense restrictions",
"Issue - Strengthen our military and national defense",
"Issue - Toughen sentences and penalties for criminals")

core_for_ggplot <- data.frame(labels=labels,full_matrix_clean)
core_for_ggplot$iv_order <- factor(core_for_ggplot$labels, as.character(core_for_ggplot$labels))

omits=c("Gender - Male",
"Race - White",
"Religion - None",
"Occupation - Attorney",
"Personality - Decent",
"Endorsements - Major area newspapers",
"Record - Help my constituents get the benefits they deserve",
"Issue - Promote expanding free trade agreements")

label.groups=str_sub(labels,1,str_locate(labels,pattern='-')[,1]-2)
o.column=grep(names(core_for_ggplot),pattern='i_est')

#core_for_ggplot=lableOrder(xmat=core_for_ggplot,labels,label.groups,omits,o.column)

#write.csv(core_for_ggplot,"csv/choice_primary_core_for_ggplot.csv")

###########################################
###Then, plot those models

#Doing the within-factor sorting in shitty hack
#core_for_ggplot <- read.csv("csv/choice_primary_core_for_ggplot.csv")
o.order <- read.csv("data/core_for_ggplot_global.csv")$iv_order

core_for_ggplot=reOrder(x=core_for_ggplot,o=o.order)



# augment w/ scores bivariate coefficient

# ideology
#primary_elec_dems_ideo <- lm(data=dems,dv_choice~
#pty + scores)
#primary_elec_dems_ideo_clse <- clse.f(dems,primary_elec_dems_ideo,dems$respondent)

#primary_elec_reps_ideo <- lm(data=reps,dv_choice~
#pty + scores)
#primary_elec_reps_ideo_clse <- clse.f(reps,primary_elec_reps_ideo,reps$respondent)

#primary_elec_inds_ideo <- lm(data=inds,dv_choice~
#pty + scores)
#primary_elec_inds_ideo_clse <- clse.f(inds,primary_elec_inds_ideo,inds$respondent)

#core_for_ggplot[,1]=as.character(core_for_ggplot[,1])
#core_for_ggplot[,2]=as.character(core_for_ggplot[,2])
#core_for_ggplot[,9]=as.character(core_for_ggplot[,9])
#core_for_ggplot=rbind(core_for_ggplot,c('i_ideology','Overall Ideology Rating',
	#primary_elec_dems_ideo_clse[2,1],primary_elec_dems_ideo_clse[2,2],
	#primary_elec_inds_ideo_clse[2,1],primary_elec_inds_ideo_clse[2,2],
	#primary_elec_reps_ideo_clse[2,1],primary_elec_reps_ideo_clse[2,2],
	#'Overall Ideology Rating'))

attach(core_for_ggplot)
labels=core_for_ggplot$labels


ggplot_stacked <- data.frame(
labels=c(as.character(labels), as.character(labels), as.character(labels)),
PID=c(rep("Democrat",length(labels)),rep("Independent",length(labels)),rep("Republican",length(labels))),
estimate=c(d_estimate,i_estimate,r_estimate),
se=c(d_se,i_se,r_se))

#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

ggplot_stacked=ggplot_stacked[!grepl(ggplot_stacked$PID,pattern='Independent'),]
#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

detach(core_for_ggplot)
attach(ggplot_stacked)


ggplot_stacked$estimate=as.numeric(as.character(ggplot_stacked$estimate))
ggplot_stacked$se=as.numeric(as.character(ggplot_stacked$se))

#exclude overall ideology
#ggplot_stacked=ggplot_stacked[-c(grep(as.character(ggplot_stacked[,1]),pattern='Overall')),]
ggplot_stacked$labels=as.factor(as.character(ggplot_stacked$labels))
#write.csv(ggplot_stacked,"csv/choice_primary_ggplot_stacked.csv")
attach(ggplot_stacked)
labels=unique(ggplot_stacked$labels)


pd <- position_dodge(.5)
minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))

ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Democrat',replace=paste(sep='','Democrat (N=',length(unique(dems$respondent)),')'))
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Republican',replace=paste(sep='','Republican (N=',length(unique(reps$respondent)),')'))

pdf("appendix/figures/choice_primary_MedKnow_sophisticated.pdf", width=9, height=14, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=PID)) +
    scale_color_manual(values = c("blue4","firebrick1")) +
    scale_x_discrete(limits=rev(labels)) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-.81,.81) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 	theme(legend.text = element_text(size = 10)) +
#    geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
    geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=25.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=38.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=46.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=56.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=60.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=63.5,size=0.5,linetype="dashed") +
    coord_flip() +
    labs(x = "Experimentally Manipulated Variable", y = "Change Pr(Vote Choice)")
dev.off()

ggplot_stackedM=ggplot_stacked

#ggplot_stackedH,ggplot_stackedM,ggplot_stackedL

###########################################
#  Ideology and Electability in Primary Voting Behavior
###########################################
#  - code by S. Goggin (Aug 10, 2017)
#  - edited by J. Henderson (Aug 16, 2017)

# short and combined version with party separate

ggplot_stackedH$Consistency=paste('I. High Sophisticated',ggplot_stackedH$PID,sep=' ')
ggplot_stackedM$Consistency=paste('II. Med Sophisticated',ggplot_stackedM$PID,sep=' ')
ggplot_stackedL$Consistency=paste('III. Low Sophisticated',ggplot_stackedL$PID,sep=' ')

iq=grepl(ggplot_stackedH$labels,pattern='Gender') | grepl(ggplot_stackedH$labels,pattern='Race') | grepl(ggplot_stackedH$labels,pattern='Religion') |
grepl(ggplot_stackedH$labels,pattern='Endorsements') | grepl(ggplot_stackedH$labels,pattern='Issue')

# merge by PID

detach(ggplot_stacked)

ggplot_stacked=rbind(ggplot_stackedH[iq,],ggplot_stackedM[iq,],ggplot_stackedL[iq,])
ggplot_stacked$labels=as.character(ggplot_stacked$labels)
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='[N()0-9= ]',replace='')

labels=ggplot_stacked$labels=as.factor(ggplot_stacked$labels)
labels=unique(labels)
pd <- position_dodge(.5)
ggplot_stacked$minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
ggplot_stacked$maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))


attach(ggplot_stacked)

#n1D=length(unique(vote$respondent[which(vote$invoted=='invote' & vote$pid3==-1 & vote$crossp50==1)])) # inconsistent
#n0D=length(unique(vote$respondent[which(vote$invoted=='invote' & vote$pid3==-1 & vote$crossp50==0)])) # consistent
#n1R=length(unique(vote$respondent[which(vote$invoted=='invote' & vote$pid3==1 & vote$crossp50==1)]))
#n0R=length(unique(vote$respondent[which(vote$invoted=='invote' & vote$pid3==1 & vote$crossp50==0)]))

#ggplot_stacked$Consistency=gsub(ggplot_stacked$Consistency,pattern='Inconsistent Democrat',replace=paste(sep='','Inconsistent Democrat (N=',n1D,')'))
#ggplot_stacked$Consistency=gsub(ggplot_stacked$Consistency,pattern='Consistent Democrat',replace=paste(sep='','Consistent Democrat (N=',n0D,')'))
#ggplot_stacked$Consistency=gsub(ggplot_stacked$Consistency,pattern='Inconsistent Republican',replace=paste(sep='','Inconsistent Republican (N=',n1R,')'))
#ggplot_stacked$Consistency=gsub(ggplot_stacked$Consistency,pattern='Consistent Republican',replace=paste(sep='','Consistent Republican (N=',n0R,')'))


pdf("appendix/figures/choice_primary_High_Med_Low_Know_sophisticated_short.pdf", width=10, height=10, pointsize=12)
#pdf("figures/choice_primary_twoparty_crossp50_combo_short.pdf", width=10, height=10, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=Consistency)) +
    scale_color_manual(values = c("#191970","darkred","steelblue3","firebrick1","lightsteelblue3","rosybrown")) +
    scale_x_discrete(limits=rev(labels)) +
		facet_wrap(~PID,ncol=2) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-0.34,0.46) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 		theme(legend.text = element_text(size = 10)) +
		#geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=33.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=37.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=40.5,size=0.5,linetype="dashed") +
		coord_flip() +
		labs(x = "Experimentally Manipulated Variable", y = "Difference in Difference in Pr(Vote Choice)")
dev.off()
detach(ggplot_stacked)

ggplot_stacked=rbind(ggplot_stackedH[iq,],ggplot_stackedM[iq,])
ggplot_stacked$labels=as.character(ggplot_stacked$labels)
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='[N()0-9= ]',replace='')

labels=ggplot_stacked$labels=as.factor(ggplot_stacked$labels)
labels=unique(labels)
pd <- position_dodge(.5)
ggplot_stacked$minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
ggplot_stacked$maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))


attach(ggplot_stacked)


pdf("appendix/figures/choice_primary_High_Med_Know_sophisticated_short.pdf", width=10, height=10, pointsize=12)
#pdf("figures/choice_primary_twoparty_crossp50_combo_short.pdf", width=10, height=10, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=Consistency)) +
    scale_color_manual(values = c("#191970","darkred","lightsteelblue3","rosybrown")) +
    scale_x_discrete(limits=rev(labels)) +
		facet_wrap(~PID,ncol=2) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-0.34,0.46) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 		theme(legend.text = element_text(size = 10)) +
		#geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=33.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=37.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=40.5,size=0.5,linetype="dashed") +
		coord_flip() +
		labs(x = "Experimentally Manipulated Variable", y = "Difference in Difference in Pr(Vote Choice)")
dev.off()

detach(ggplot_stacked)

ggplot_stacked=rbind(ggplot_stackedH[iq,],ggplot_stackedL[iq,])
ggplot_stacked$labels=as.character(ggplot_stacked$labels)
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='[N()0-9= ]',replace='')

labels=ggplot_stacked$labels=as.factor(ggplot_stacked$labels)
labels=unique(labels)
pd <- position_dodge(.5)
ggplot_stacked$minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
ggplot_stacked$maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))


attach(ggplot_stacked)


pdf("appendix/figures/choice_primary_High_Low_Know_sophisticated_short.pdf", width=10, height=10, pointsize=12)
#pdf("figures/choice_primary_twoparty_crossp50_combo_short.pdf", width=10, height=10, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=Consistency)) +
    scale_color_manual(values = c("#191970","darkred","lightsteelblue3","rosybrown")) +
    scale_x_discrete(limits=rev(labels)) +
		facet_wrap(~PID,ncol=2) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-0.34,0.46) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 		theme(legend.text = element_text(size = 10)) +
		#geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=33.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=37.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=40.5,size=0.5,linetype="dashed") +
		coord_flip() +
		labs(x = "Experimentally Manipulated Variable", y = "Difference in Difference in Pr(Vote Choice)")
dev.off()


detach(ggplot_stacked)

ggplot_stacked=rbind(ggplot_stackedM[iq,],ggplot_stackedL[iq,])
ggplot_stacked$labels=as.character(ggplot_stacked$labels)
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='[N()0-9= ]',replace='')

labels=ggplot_stacked$labels=as.factor(ggplot_stacked$labels)
labels=unique(labels)
pd <- position_dodge(.5)
ggplot_stacked$minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
ggplot_stacked$maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))


attach(ggplot_stacked)


pdf("appendix/figures/choice_primary_Med_Low_Know_sophisticated_short.pdf", width=10, height=10, pointsize=12)
#pdf("figures/choice_primary_twoparty_crossp50_combo_short.pdf", width=10, height=10, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=Consistency)) +
    scale_color_manual(values = c("#191970","darkred","lightsteelblue3","rosybrown")) +
    scale_x_discrete(limits=rev(labels)) +
		facet_wrap(~PID,ncol=2) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-0.34,0.46) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 		theme(legend.text = element_text(size = 10)) +
		#geom_vline(xintercept=1.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=33.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=37.5,size=0.5,linetype="dashed") +
		geom_vline(xintercept=40.5,size=0.5,linetype="dashed") +
		coord_flip() +
		labs(x = "Experimentally Manipulated Variable", y = "Difference in Difference in Pr(Vote Choice)")
dev.off()
